*******************
* Constructing HH outcomes for Global South COVID paper
* SIAYA HH Survey
* 2020-06-23
* Originally by Magdalena Larreboure, 2020-06-23
* Updated by Michael Walker, 2020-07-24
* Updated by Magdalena Larreboure, 2020-09-09
*******************

/************************************/
/* begin outcome construction 			*/
/************************************/


set more off
foreach round in "Rd1" "Rd2" {
// use "$dr1/COVID_HH_`round'.dta", clear
// keep if surveystatus==1

// * Convert date of survey to numeric date
// gen today_num = survey_date
// drop if today_num<22000 | week>14
// gen today_date = today_num
// format today_date %td
// tostring today_date, replace force usedisplayformat

// if "`round'"=="Rd1"{
// keep current_num_* s5_q39* s5_q40* s5_q41* s5_q40_hardship s5_q24_whoemployed s5_q29_wholaidoff s5_q30b_whenlaidoff_* s5_q35a_value s5_q36a_value s5_q37a_value s5_q35_govthelp s5_q36_ngohelp s5_q37_politicianhelp hhid_key hh_weight week surveystatus s9_fogender s4_q3a_food s4_q1_consumptionamt s5_q8_ownconsumption s4_q2a_foodamt s4_q2_receivedfood s4_q3b_personal s4_q3c_durables s4_q3d_services s4_q3e_comms s4_q3f_housing s4_q3g_utilities s4_q3h_transport s4_q3i_medical formdef_version s9_q4_threaten s9_q5_physicallyhurt s9_q6_sexacts s9_q7a_beatkids weightgroup
// }

// if "`round'"=="Rd2"{
// keep current_num_* hhid_key hh_weight week surveystatus s5_q39* s5_q40* s5_q41* s4_q3a_food s4_q1_consumptionamt s4_q2a_foodamt s4_q2_receivedfood s4_q3b_personal s4_q3c_durables s4_q3d_services s4_q3e_comms s4_q3f_housing s4_q3g_utilities s4_q3h_transport s4_q3i_medical formdef_version s9_q4_threaten s9_q5_physicallyhurt s9_q6_sexacts s9_q7a_beatkids weightgroup s9_fogender
// }
// gen newid=_n
// save "$dr/KEN1_raw_`round'.dta" , replace

use "$dr/KEN1_raw_`round'.dta" , clear

*number of HH members
destring current_num_youngchildren current_num_schoolchildren current_num_adults , replace
egen n_hhmem=rowtotal(current_num_adults current_num_youngchildren current_num_schoolchildren)
label var n_hhmem "Number of household members, including children"


********************************************************************************************************
							******************* Food security *******************
********************************************************************************************************

// - Missed meals
// - Reduced portions


loc fdays s5_q39a_hungryadult s5_q39b_hungrychild s5_q40a_skippedadult s5_q40b_skippedchild s5_q41a_nofoodadult s5_q41b_nofoodchild // how many days out of 7 qs42-44 in paper version
foreach var of varlist `fdays' {
	tab `var'
	loc name_`var'=substr("`var'", 9, 12)
	g f1_d_`name_`var'' =`var'
	replace f1_d_`name_`var''=. if `var'==-99
}

label var f1_d_hungryadult "Number of days adults have gone to bed hungry in the past 7 days"
label var f1_d_hungrychild "Number of days children have gone to bed hungry in the past 7 days" //42b. In the past 7 days, how many days have CHILDREN in your household gone to bed hungry?"
label var f1_d_skippedadult "Number of days adults have skipped meals in the past 7 days" //43a. In the past 7 days, how many days have ADULTS in your household skipped meals or cut the amount of meals?
label var f1_d_skippedchild "Number of days children have skipped meals in the past 7 days"
label var f1_d_nofoodadult "Number of days adults have gone the entire day without food" //44a. In the past 7 days, how many days have ADULTS in your household gone entire days without food?
label var f1_d_nofoodchild "Number of days children have gone the entire day without food"

tab1 f1_d_skippedadult f1_d_skippedchild, m
g missedmeals=1 if (f1_d_skippedadult>=1 & f1_d_skippedadult<=7) | (f1_d_skippedchild>=1 & f1_d_skippedchild<=7)
replace missedmeals=1 if (f1_d_nofoodadult>=1 & f1_d_nofoodadult<=7) | (f1_d_nofoodchild>=1 & f1_d_nofoodchild<=7)
replace missedmeals=0 if f1_d_skippedadult==0 & f1_d_skippedchild==0 & f1_d_nofoodadult==0 & f1_d_nofoodchild==0
label var missedmeals "Share of sample missing or cutting meals in the past 7 days"

preserve 
keep newid f1_d_skippedadult f1_d_skippedchild
tempfile missedmeals
save `missedmeals'
restore

if "`round'"=="Rd1"{
********************************************************************************************************
							******************* Income *******************
********************************************************************************************************

// - Drop in income
g f1_h_income=1 if strpos(s5_q40_hardship, "5")>0
replace f1_h_income=0 if strpos(s5_q40_hardship, "5")==0 & s5_q40_hardship!="" // This was not asked as part of early surveys, hence some additional missing values
label var f1_h_income "Our household income has dropped"

g incomedrop=f1_h_income
label var incomedrop "Share of sample reporting being unable to buy usual amount of food due to a drop in income"

* construct analogue for WB/Refugee version here where we can bring in round 2 data with retrospective questions to feb 2020

********************************************************************************************************
							******************* Employment *******************
********************************************************************************************************

// - Drop in employment

// 1. Share of employed that lost job since February - emp1_lostjob
*tab s5_q24_whoemployed
g nemployed_now= wordcount(s5_q24_whoemployed) if s5_q24_whoemployed != "" // 24. Which ADULT household members are currently employed, working for pay?
replace nemployed_now=0 if s5_q24_whoemployed=="-98"
*tab nemployed_now


*tab s5_q29_wholaidoff
g nlaidoff_smarch=wordcount(s5_q29_wholaidoff) if s5_q29_wholaidoff != "" // 29. Since January 2020, which ADULTS in your household were laid off / lost their job?
replace nlaidoff_smarch=0 if s5_q29_wholaidoff=="-98"
*tab1 s5_q30b_whenlaidoff_?
replace nlaidoff_smarch=0 if s5_q30b_whenlaidoff_1==1 | s5_q30b_whenlaidoff_1==2 | s5_q30b_whenlaidoff_2==1  | s5_q30b_whenlaidoff_2==2 // laid off in January/February
*tab nlaidoff_smarch

g nemployed_march=nemployed_now+nlaidoff_smarch
*tab nemployed_march
gen share_hh_employed_march = (nemployed_march>0) if ~mi(nemployed_march)
*summ share_hh_employed_march [aw=hh_weight]

g emp1_lostjob=nlaidoff_smarch/nemployed_march
label var emp1_lostjob "Share of employed that lost job since March"

*summ emp1_lostjob [aw=hh_weight]

// - Drop in employment hh level

* converting to individual level outcomes
split s5_q24_whoemployed, destring

// there should be 7 variables generated -- need to confirm, and if this is higher, update loop below

*** generate indicator variables for household members being currently employed ***
// looping through adult household members
forval i=1/11 {
	gen curremp`i' =  (s5_q24_whoemployed1 == `i' | s5_q24_whoemployed2 == `i' | s5_q24_whoemployed3 == `i' ///
									| s5_q24_whoemployed4 == `i' | s5_q24_whoemployed5 == `i' | s5_q24_whoemployed6 == `i' ///
									| s5_q24_whoemployed7 == `i') if ~mi(s5_q24_whoemployed)
}

** generate indicators for individual-specific layoffs **
split s5_q29_wholaidoff, gen(wholaidoff_rd1_) destring
forval i = 1/11 {
	gen laidoff`i' = (wholaidoff_rd1_1 == `i' | wholaidoff_rd1_2 == `i') if ~mi(s5_q29_wholaidoff)
	replace laidoff`i' = 0 if (wholaidoff_rd1_1 == `i' & inlist(s5_q30b_whenlaidoff_1,1,2)) ///
			| (wholaidoff_rd1_2 == `i' & inlist(s5_q30b_whenlaidoff_2,1,2)) // don't count jan / feb layoffs
}

forval i = 1/11 {
	gen lostjob`i' = (laidoff`i' == 1 & curremp`i' == 0) if ~mi(laidoff`i') & ~mi(curremp`i')
}

*summ laidoff* curremp* lostjob*

egen lostjobs = rowtotal(lostjob?), m
*tab lostjobs


g employmentdrop=cond(lostjobs>0,1,0) if nemployed_march > 0 & ~mi(nemployed_march ) // conditional on anyon in the household working for wages pre-COVID.
label var employmentdrop "Employment drop in the household"

*summ employmentdrop [aw=hh_weight]

********************************************************************************************************
							******************* Access to markets *******************
********************************************************************************************************

// - Reduction in access to markets

g f1_h=0
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "1")>0 //q40 in paper version
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "2")>0
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "3")>0
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "4")>0
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "5")>0
replace f1_h=f1_h+1 if strpos(s5_q40_hardship, "6")>0
replace f1_h=. if s5_q40_hardship==""
replace f1_h=0 if strpos(s5_q40_hardship, "-98")>0
label var f1_h "Number of food hardship experiences (out of 6)"

*Disaggregate
g f1_h_going=1 if strpos(s5_q40_hardship, "1")>0
replace f1_h_going=0 if strpos(s5_q40_hardship, "1")==0 & s5_q40_hardship!=""
label var f1_h_going "Difficulties in going to food markets due to mobility restrictions imposed by government"
g f1_h_closed=1 if strpos(s5_q40_hardship, "2")>0
replace f1_h_closed=0 if strpos(s5_q40_hardship, "2")==0 & s5_q40_hardship!=""
label var f1_h_closed "Difficulties in buying food due to most food markets being closed"
g f1_h_reduce=1 if strpos(s5_q40_hardship, "6")>0
replace f1_h_reduce=0 if strpos(s5_q40_hardship, "6")==0 & s5_q40_hardship!=""
label var f1_h_reduce "Had to reduce the number of meals and/or the portion of each meal we would usually eat"

*Unable to buy the amount of food we usually buy because:
g f1_h_shortages=1 if strpos(s5_q40_hardship, "3")>0
replace f1_h_shortages=0 if strpos(s5_q40_hardship, "3")==0 & s5_q40_hardship!=""
label var f1_h_shortages "Shortages in markets"
g f1_h_price=1 if strpos(s5_q40_hardship, "4")>0
replace f1_h_price=0 if strpos(s5_q40_hardship, "4")==0 & s5_q40_hardship!=""
label var f1_h_price "Price of food was too high"

g accessmarkets = 1 if f1_h_shortages==1 | f1_h_going==1 | f1_h_closed==1 //
replace accessmarkets = 0 if f1_h_shortages==0 & f1_h_going==0 & f1_h_closed==0
label var accessmarkets "Share of sample experiencing a reduction in market access"

tab accessmarkets

* Here i'd be defining reduction in access to markets=1 if they experienced either:
// 1. Difficulties in going to food markets due to mobility restrictions imposed by government
// 2. Difficulties in buying food due to most food markets being closed
// 3. Shortages in markets


********************************************************************************************************
							******************* NGO or GOV support *******************
********************************************************************************************************
// - Received new NGO or Govt Support


// I5: Support from government, NGOs/community groups and politicians: 5.37a + 5.38a + 5.39a
loc support s5_q35a_value s5_q36a_value s5_q37a_value
loc support2 s5_q35a_value s5_q36a_value
replace s5_q35a_value=0 if s5_q35_govthelp==2
replace s5_q36a_value=0 if s5_q36_ngohelp==2
replace s5_q37a_value=0 if s5_q37_politicianhelp==2
foreach var of varlist `support' {
replace `var'=. if `var'==-99
}

replace s5_q36a_value= 0 if s5_q36a_value == 100 // Since people are probably reporting the 100Ksh gift from completing the survey (~6% reported 100Ksh trasnfer amt.)

g i5a=s5_q35a_value/$pppfactor
g i5b=s5_q36a_value/$pppfactor
g i5c=s5_q37a_value/$pppfactor
label var i5a "Support from government"
label var i5b "Support from NGOs/community groups"
label var i5c "Support from politicians"

egen i5=rowtotal(`support2') , m
replace i5=i5/$pppfactor
label var i5 "Support from government and NGOs/community groups (pc) in the past 14 days"

g anysupport=1 if i5>0 & i5!=.
replace anysupport=0 if i5==0
label var anysupport "Share of sample receiving any support from government or NGOs in the past 14 days"


********************************************************************************************************
loc outcomes missedmeals incomedrop employmentdrop accessmarkets anysupport


keep hhid_key `outcomes' hh_weight week 


preserve
 use "$dr/KEN1_raw_EL1.dta" , clear

	sum c4_EL1 , detail // Per-capita consumption expenditure over last 7 days
	qui sum c4_EL1 , detail // [aw=$weight]
	g b_cons= cond(c4_EL1 <= `r(p50)', 1, 0)
 

 keep hhid_key b_cons
 tempfile el1
 save `el1'
 restore
 merge 1:1 hhid_key using `el1' , keep(match master) nogen

  save "$da/KEN1_maintable.dta" , replace
  }
  ********************************************************************************************************
use "$dr/KEN1_raw_`round'.dta", clear
keep if surveystatus==1
*Share rural
g rural=1
*Household size
destring current_num_youngchildren current_num_schoolchildren current_num_adults , replace
g hhsize=current_num_adults+current_num_youngchildren+current_num_schoolchildren
label var hhsize "Number of household members, including children"

*Respondent gender
g female=s9_fogender==2
replace female=. if s9_fogender==.

*Educational attainment *Poverty score / average income
preserve
use "$dr/KEN1_raw_EL1_dem.dta" , clear
rename hhsize1 hhsize_el1
rename female_BL female_el1
rename formschool completesecondary
g consumption_el1= (p2_consumption_wins_PPP/52)
drop if hhid_key==""
tempfile el1
save `el1'
restore

merge 1:1 hhid_key using `el1' //, keep(match master) nogen

********************************************************************************************************
										**** CONSUMPTION ****
********************************************************************************************************
// C1: Total food expenditure in the past 7 days: 4.1.1 + 4.3.Groceries/Food
if "`round'"=="Rd1"{
replace s4_q1_consumptionamt=s5_q8_ownconsumption if s4_q1_consumptionamt==.
}
loc c1 s4_q3a_food s4_q1_consumptionamt // expenditure + own agricultural or pastoral production
tab1 `c1'
foreach var of varlist `c1' {
	replace `var' = . if `var' == -99
}
egen c1= rowtotal(`c1') , m
replace c1=. if s4_q3a_food==. | s4_q1_consumptionamt==.
replace c1=c1/$pppfactor
label var c1 "Total food expenditure in the past 7 days"

g c1a=s4_q3a_food/$pppfactor
label var c1a "Food expenditure in the past 7 days"

g c1b=s4_q1_consumptionamt/$pppfactor
label var c1b "Own food expenditure in the past 7 days"

// C2: Total food consumption in the past 7 days: C1 +  4.1.2a
tab s4_q2a_foodamt
replace s4_q2a_foodamt=. if s4_q2a_foodamt==-99
replace s4_q2a_foodamt=0 if s4_q2_receivedfood==2
g s4_q2a_foodamt_PPP = s4_q2a_foodamt/$pppfactor
loc c2 c1 s4_q2a_foodamt_PPP
egen c2= rowtotal(`c2') , m
replace c2=. if c1==. | s4_q2a_foodamt_PPP==.
label var c2 "Total food consumption (incl. gifts) in the past 7 days"

// C3: Non food expenditure in the past 7 days: 4.3.Household Items + 4.3.Assets + 4.3.Services + 4.3.Communication + 4.3.Housing + 4.3.Energy + 4.3.Transport + 4.3.Medical

* From version 2004290826 formdef_version onwards we ask for past 14 days for b-i, before we asked for 7 days.
loc nfexp s4_q3b_personal s4_q3c_durables s4_q3d_services s4_q3e_comms s4_q3f_housing s4_q3g_utilities s4_q3h_transport s4_q3i_medical
foreach var of varlist `nfexp' {
  tab `var'
  replace `var'=. if `var'==-99
  replace `var' = . if `var' < 0 // there are a couple of cases of what seem like typos
  replace `var'=`var'/2 if formdef_version>=2004290826
}

egen c3=rowtotal(`nfexp') , m
replace c3=c3/$pppfactor
foreach var of varlist `nfexp' {
	replace c3=. if `var'==.
	}
label var c3 "Non-food expenditure in the past 7 days"

// C4: Total consumption in the past 7 days: C2 + C3
egen c4=rowtotal(c2 c3) , m
replace c4=. if c2==. | c3==.
label var c4 "Total consumption in the past 7 days"

preserve
keep hh_weight rural rural_el1 hhsize hhsize_el1 female female_el1 yearsedu completesecondary consumption_el1 c4 _merge current_num_adults
g covid_sample=_merge==3 | _merge==1 
g el1_sample=_merge==3 | _merge==2
drop _merge
g yearsedu_el1 = yearsedu
g completesecondary_el1=completesecondary
replace consumption_el1=consumption_el1/1.967 
g consumption=consumption_el1 // so it's not affected by COVID

if "`round'"=="Rd1"{
save "$da/KEN1_reptable.dta" , replace
}
restore

**************************
**** VIOLENCE ****
**************************

*Gender violence
loc gv s9_q4_threaten s9_q5_physicallyhurt s9_q6_sexacts s9_q7a_beatkids // 1 never 2 sometimes 3 often -88 refuse
loc i=1
foreach var of varlist `gv' {
replace `var'=. if `var'==-88
g gv`i'=1 if `var'==2 | `var'==3
replace gv`i'=0 if `var'==1
loc i=`i'+1
}

*Share of female respondents, to female FOs. In the past 14 days
label var gv1 "Partner has threaten to harm them or someone close to them"
label var gv2 "Partner has hit, slapped, kicked, or physically hurt them"
label var gv3 "Partner has forced them to perform sexual acts"

*Either the FR or partner
label var gv4 "Percentage of households where children were beaten" //out of hh with children

drop if newid==. 
merge 1:1 newid using `missedmeals' , nogen

*for figure, measure per capita
replace c2 = c2/hhsize 
replace c3 = c3/hhsize 

loc outcomes f1_d_skippedadult f1_d_skippedchild c2 c3

capture {
do "$do/programs/run_ge_build_programs.do"
}

foreach var of varlist `outcomes' {
			wins_topgroup `var', by(weightgroup)

}

keep c2_wins c3_wins gv* f1_d_skippedadult_wins f1_d_skippedchild_wins week hh_weight
tab week

  save "$dr/KEN1_fig1_hh_`round'.dta" , replace

}

append using "$dr/KEN1_fig1_hh_Rd1.dta"
sort week
save "$dr/KEN1_fig1_hh.dta" , replace
